///*********************************************************////
///*********************************************************////
///******           create estimation_file     ************////
///*********************************************************////
///*********************************************************////



///*** make one dataset with lss results, mean per speech-year ***///

clear
insheet using "TextAnalyses/results/lss_econ.csv", comma
replace lss="" if lss=="NA"
destring lss, replace
collapse (mean) lss, by(country year name)
ren lss lss_econ
destring lss, replace
save "TextAnalyses/results/lss_econ.dta", replace
clear


insheet using "TextAnalyses/results/lss_fight.csv", comma
replace lss="" if lss=="NA"
destring lss, replace
collapse (mean) lss, by(country year name)
ren lss lss_fight
save "TextAnalyses/results/lss_fight.dta", replace
clear


insheet using "TextAnalyses/results/lss_national.csv", comma
replace lss="" if lss=="NA"
destring lss, replace
collapse (mean) lss, by(country year name)
ren lss lss_nation
save "TextAnalyses/results/lss_nation.dta", replace
clear


use TextAnalyses/results/lss_econ
sort country year
merge country year using TextAnalyses/results/lss_fight, nokeep
drop _merge
save TextAnalyses/results/lss_all, replace

sort country year
merge country year using TextAnalyses/results/lss_nation, nokeep
drop _merge
save TextAnalyses/results/lss_all, replace


/* make dictionary stata file */
clear
insheet using "TextAnalyses/results/DICT_counts.csv", comma
sort document
save TextAnalyses/results/dict, replace
clear
use TextAnalyses/results/id
sort document
merge document using TextAnalyses/results/dict, nokeep
drop _merge
keep document country year type date  total_size econ_share fight_share patriot_share pos_share neg_share
sort country year
save TextAnalyses/results/dict, replace


/* add dictionary, sentiment, etc */
clear
use TextAnalyses/results/lss_all
sort country year
merge country year using TextAnalyses/results/dict, nokeep
drop _merge
save TextAnalyses/results/texts, replace
clear
use TextAnalyses/results/texts
sort document
merge document using TextAnalyses/results/topics35, nokeep
drop _merge
drop gamma1 gamma2 gamma3 gamma4 gamma5 gamma6 gamma7 gamma9 gamma10 gamma11 gamma12 gamma13 gamma15 gamma16 gamma17 gamma18 gamma19 gamma20 gamma21 gamma22 gamma23 gamma25 gamma26 gamma27 gamma28 gamma29 gamma30 gamma31 gamma33 gamma34 gamma35
save TextAnalyses/results/texts, replace
clear
insheet using "TextAnalyses/results/sentiment_counts.csv", comma
sort document
save TextAnalyses/results/sentiment, replace
clear
use TextAnalyses/results/texts
sort document
merge document using TextAnalyses/results/sentiment, nokeep
drop _merge
gen insult_share=insult/freq



/* lss/texts is combined file of all text results */
save TextAnalyses/results/texts, replace



////*****************************************************///
///*** add explanatory variables to estimation_file  ***///
///*****************************************************///

clear
use data/vars/vars
sort iso3n year
merge iso3n year using data/vars/more, nokeep
drop _merge
sort country year
save data/vars/variables, replace

use data/vars/variables
sort country year
merge country year using data/vars/econ, nokeep
drop _merge
sort country year
save data/vars/variables, replace
clear

/*  */

clear
use data/vars/variables
sort ccode year
merge ccode year using data/vars/mm_postsoviet_year, nokeep
replace maxprotesters=0 if maxnumprotests==0
drop _merge maxviolentrespond maxdemand maxprotesters
sort country year
save data/vars/variables, replace
sort ccode year
merge ccode year using data/vars/MIDB_postsoviet_year, nokeep
/* replace as 0s for all except nonrecognized states */
replace sumdispute =0 if sumdispute==. & unrecognized!=1
replace midb_event=0 if midb_event==. & unrecognized!=1
drop _merge sumuseofforce sumforcedisplay maxuseofforce maxforcedisplay maxhiact maxfatality maxrevstate maxsidea stabb
sort country year
save data/vars/variables, replace

clear
use data/vars/freedomhouse
replace country="NKR" if country=="Nagorno-Karabakh"
replace country="SO" if country=="South Ossetia"
replace country="PMR" if country=="Transnistria"
sort country year
save data/vars/freedomhouse, replace


clear
use data/vars/variables
sort country year
merge country year using data/vars/freedomhouse, nokeep
drop _merge
sort country year
drop v2svstterr tradepr financialpr travelpr v2x_horacc v2xnp_pres polconiii polconv latent_personalism xconst  xconst gdpgrowth gdp2010const gdpgrowthl1 gdppcgrowthl1  
save data/vars/variables, replace

/* merge all text results file with independent variables' file */
clear
use TextAnalyses/results/texts
sort country year
merge country year using data/vars/variables, nokeep
drop _merge

save estimation_file, replace

///***   more variables  ***///
clear
use estimation_file
/* time in office */
gen timeinoffice=year-starty
/* two data entry errors, replace */
replace preselec=0 if country=="NKR" & year==2015
replace preselec=1 if country=="Azerbaijan" & year==2008
gen elecrefer=0
replace elecrefer=1 if preselec==1 
replace elecrefer=1 if referendum==1 
gen autocracy=0 if v2x_regime!=.
replace autocracy=1 if v2x_regime==0
gen loggdp=log(gdp2010constl1)
/* color revolution */
gen month= month(date)
gen colorrev=0
replace colorrev=1 if year==2000 & month>9
replace colorrev=1 if year==2001 & month>4
replace colorrev=1 if year==2003 & month>10
replace colorrev=1 if year==2004 & month<6
replace colorrev=1 if year==2004 & month>10
replace colorrev=1 if year==2005 & month<7
replace colorrev=1 if year==2010 & month==12
replace colorrev=1 if year==2011 & month<7
/* Kuchma's first year is 1994 */
replace starty=1994 if name=="Kuchma"
sort name year
gen firstyear=0
replace firstyear=1 if year==starty

/* add election dates */
sort country name year
merge country name year using  data/vars/electiondates, nokeep
drop _merge

gen timeto=anydate2-date
/* in Armenia 2018 it is Pashinyan not Sargsyan */
replace timeto=45 if country=="Armenia" & name=="Pashinyan" & year==2018

/* add background characteristics */
sort name
merge name using data/vars/postsov_leader, nokeep
drop _merge

sort name year
gen startyplus5=starty+5
gen firstterm=0
replace firstterm=1 if year>=starty & year<startyplus5
replace firstterm=1 if name=="Atambaev" & year==2017
replace firstterm=0 if name=="Bagapsh" & year==2009
replace firstterm=1 if name=="Lukashenko" & year<2001
replace firstterm=1 if name=="Margvelashvili" & year==2018
replace firstterm=1 if name=="Shevardnadze" & year<2000

gen age=(date-dob)/365.5
list name date dob year if age>100
replace age=date-yob if name=="Pashinyan"
gen age2=year-yob

gen silovik2=silovik
replace silovik2=1 if career1=="lawyer"

tab educ, gen(ed)
drop ed1 ed2 ed5 ed6 ed8 ed9 ed10 ed11 ed12 ed13 ed14

label var gdppcgrowth "Economic growth"
label var loggdp "GDP pc, log"
label var autocracy "Closed autocracy"
label var preselec "Election year"
label var referendum "Referendum"
label var loggdp "GDP pc, log"
label var maxnumprotests "Protests"
label var maxsanction "Under sanctions"
label var  sumdispute "Interstate disputes"
label var  colorrev "Color revolution"
label var  maxsanction "Sanctions"
label var silovik2 "Silovik"
label var partysec  "Ex-party sec."
label var ed4 "Engineering degree"
label var firstterm "First term"
label var conflict "Military conflict"
label var gaswar "Energy dispute"
label var midb_event "Interstate dispute"
label var unrecognized "Unrecognized state"
label var centralasia "Central Asia"
label var caucasus "Caucasus"
label var dictatorship "Closed dictatorship"
label var ethnicrus "Leader ethnic Russian"
label var  age "Leader's age'"
label var timeto "Time to election"
label var v2xme_altinf "vdem altinf"

/* before and after election and referendum dummy vars */
gen before_elec=0
replace before_elec=1 if date<preselecdate & preselec==1 & preselecdate!=.
gen after_elec=0
replace after_elec=1 if date>=preselecdate & preselec==1 & preselecdate!=.
gen before_ref=0
replace before_ref=1 if date<referendumdate & referendum==1 & referendumdate!=.
gen after_ref=0
replace after_ref=1 if date>=referendumdate & referendum==1 & referendumdate!=.
label var before_elec "Before election"
label var after_elec "After election"
label var before_ref "Before referendum"
label var after_ref "After referendum"

/* different referenda indicators */
gen ptlref=0
replace ptlref=1 if country=="Belarus" & year==1996
replace ptlref=1 if country=="Belarus" & year==2014
replace ptlref=1 if country=="Armenia" & year==2015
replace ptlref=1 if country=="Azerbaijan" & year==2009
replace ptlref=1 if country=="Azerbaijan" & year==2002
replace ptlref=1 if country=="Azerbaijan" & year==2016
replace ptlref=1 if country=="Kazakhstan" & year==1995
replace ptlref=1 if country=="Kyrgyzstan" & year==2003
replace ptlref=1 if country=="Russia" & year==1993
replace ptlref=1 if country=="Tajikistan" & year==2003
replace ptlref=1 if country=="Tajikistan" & year==2016 
replace ptlref=1 if country=="Uzbekistan" & year==1991
replace ptlref=1 if country=="Ukraine" & year==2000
replace ptlref=1 if country=="Kyrgyzstan" & year==2016 /*exec power increase*/
label var ptlref "Exec.power/PTL referendum"

drop preselecdate2 referendumdate2 anydateall  startyplus5 

/* dummy variable if text translated or not */
gen inrussian=0
replace inrussian=1 if country=="Moldova" | country=="Georgia" | country=="Ukraine"
replace inrussian=0 if country=="Georgia" & year==2010
replace inrussian=0 if country=="Georgia" & year==2011
replace inrussian=0 if country=="Georgia" & year==2012
replace inrussian=0 if country=="Georgia" & year==2014
replace inrussian=0 if country=="Ukraine" & year<1994
replace inrussian=1 if country=="Ukraine" & year==2012
replace inrussian=0 if country=="Ukraine" & year==1994 & name=="Kravchuk"
label var inrussian "Translated text"
label var type "Legislative address"
label var total_size "Text length"
gen logsize=log(total_size) 
label var logsize "Text length"
/* in Belarus in 1996 speech to the assembly not parliament */
replace type=1 if country=="Belarus" & year==1996
/* one error correction */
replace total_size=4379 if country=="Ukraine" & year==2009
/* annual address to the nation in Azerbaijan not parliament */
replace type=1 if country=="Azerbaijan"


keep country year name lss_econ lss_nation lss_fight  document type date total_size econ_share insult_share patriot_share pos_share neg_share logsize negative positive gamma8 gamma14 gamma24 gamma32 freq preselec referendum   gdppcgrowth loggdp maxnumprotests maxsanction sumdispute midb_event autocracy firstterm colorrev  silovik2  partysec ed4 type inrussian  centralasia caucasus unrecognized conflict gaswar dictatorship ptlref v2xme_altinf before_elec before_ref  after_elec after_ref v2x_regime  timeto ccode month dob yob ethnicrus age iso3n 
/* save the main estimation_file */
save estimation_file, replace





///*********************************************************////
///*********************************************************////
///******  create supplementary UNGA estimation_file  *******////
///*********************************************************////
///*********************************************************////


/* text analyses results */
clear
insheet using "TextAnalyses/results/lss_all_un.csv", comma
save TextAnalyses/results/un_poslania, replace
ren lss lss_econ
ren lss2 lss_nation
sort country year
save TextAnalyses/results/un_poslania, replace
clear
insheet using "TextAnalyses/results/lsd_counts_un.csv", comma
ren name country
drop session
sort country year
save TextAnalyses/results/lsd_counts_un, replace

/* make one text results file */
use TextAnalyses/results/un_poslania
sort country year
merge country year using TextAnalyses/results/lsd_counts_un, nokeep
drop _merge
sort country year
save TextAnalyses/results/un_poslania, replace

/* merge with independent variables */
clear
use TextAnalyses/results/un_poslania
sort country year
merge country year using data/vars/unga_vars, nokeep
drop _merge
save estimation_file_unga, replace


clear
use estimation_file_unga

/* install kountry ado if not installed */ 
ssc install kountry, replace  

/* postsoviet region indicator */
kountry ccode, from(cown) to(iso3n) geo(sov)
ren _ISO3N_ iso3n 
gen postsoviet=0 
replace postsoviet=1 if GEO=="Post Soviet"
replace postsoviet=0 if country=="EST" | country=="LTU" | country=="LVA" 

/* independent variables */
gen incomepc= rgdpna/pop
gen loggdppc=log10(incomepc)
label var loggdppc "GDP pc, log"
gen autocracy=0
replace autocracy=1 if v2x_regime==0
label var autocracy "Autocracy"
/* replace as 0s for nonconflict */
replace sumdispute =0 if sumdispute==.
gen midb_event=0 if sumdispute==0
replace midb_event=1 if sumdispute>0 & sumdispute!=.
gen colorrev=0
replace colorrev=1 if year==2000 
replace colorrev=1 if year==2003 
replace colorrev=1 if year==2004 
replace colorrev=1 if year==2005 
replace colorrev=1 if year==2011 
label var v2eltype_6 "Election year"
label var v2ddyror "Referendum"
label var wdigrowth "GDP growth"
label var  sumdispute "Interstate disputes"
label var  maxsanction "Sanctions"
label var anycamp "Protests"
label var v2ddyror "Referendum"
label var midb_event "Interstate dispute"
label var colorrev "Color revolution"

     
/* sentiment measure */ 
gen negsentiment=log((negative+0.5)/(positive+0.5)) 

encode country, gen(country2)
xtset country2 year


keep country session year doc_idx total_size coldwar iso3n country2 lss_econ lss_nation negsentiment wdigrowth ///
v2eltype_6 v2ddyror  anycamp maxsanction  midb_event loggdppc autocracy colorrev  postsoviet vdem
 
save estimation_file_unga, replace




///***************************************************************////
///***************************************************************////
///****** create supplementary Russia texts (Appendix Fig 3) *******////
///***************************************************************////
///***************************************************************////


clear
import delimited "TextAnalyses/results/lss_econ_rus.csv", clear
replace lss="" if lss=="NA"
destring lss, replace
collapse (mean) lss, by(name)
ren lss lss_econ
save "TextAnalyses/results/lss_econ_rus.dta", replace
clear

import delimited "TextAnalyses/results/lss_fight_rus.csv", clear 
replace lss="" if lss=="NA"
destring lss, replace
collapse (mean) lss, by(name)
ren lss lss_fight
save "TextAnalyses/results/lss_fight_rus.dta", replace
clear

import delimited "TextAnalyses/results/lss_nation_rus.csv", clear 
replace lss="" if lss=="NA"
destring lss, replace
collapse (mean) lss, by(name)
ren lss lss_nation
save "TextAnalyses/results/lss_nation_rus.dta", replace
clear

use "TextAnalyses/results//russia_id.dta"
ren ID name
sort name
merge name using TextAnalyses/results/lss_econ_rus, nokeep
drop _merge
sort name
merge name using TextAnalyses/results/lss_fight_rus, nokeep
drop _merge
sort name
merge name using TextAnalyses/results/lss_nation_rus, nokeep
drop _merge
save TextAnalyses/results/russia_all, replace


/* putin speeches only */
gen putin=0
replace putin=1 if source=="kremlin" & date < mdy(8,5,2008) 
replace putin=1 if source=="kremlin" & date >= mdy(9,5,2012) 
replace putin=1 if source=="premier" 

save TextAnalyses/results/russia_all, replace







